#!/usr/bin/env python3
"""解析傳入文字的語法，並輸出為指定格式"""
import json
from collections import namedtuple
from functools import reduce

from lxml import etree

from . import (
    HTML_BOOLEAN_ATTRIBUTES,
    HTML_VOID_ELEMENTS,
    REGEX_ASCII_WHITESPACES,
    cached_property,
    html_escape,
)


class Parser():
    """解析器

    連結 Lexer 及 Renderer，根據指定的輸入與輸出模式處理輸入的文本

    - 具快取設定值的功能。相同設定可建立一個 Parser 多次執行，提高效能。
    """

    def __init__(self, char_subst_table=None, renderer_filter=None):
        self.char_subst_table = CharSubstTable(char_subst_table) if isinstance(char_subst_table, dict) else None
        self.renderer_filter = renderer_filter or []

    def run(self, fh, imode='html', omode='html'):
        self.imode = imode
        self.omode = omode

        lexer = self.get_lexer(imode)(
            char_subst_table=self.char_subst_table,
        )
        renderer = self.get_renderer(omode)(
            filter=self.renderer_filter,
        ) if omode else None

        lexer.parse(fh)
        self.info = lexer.info
        self.output = renderer.render(lexer.calls) if omode else None

        return self.output

    @staticmethod
    def to_camelcase(text, delim='_'):
        """Convert delimited_text to camelCase."""
        return ''.join(w.title() if i else w for i, w in enumerate(text.split(delim)))

    @staticmethod
    def to_uppercamelcase(text, delim='_'):
        """Convert delimited_text to UpperCamelCase."""
        return ''.join(w.title() for w in text.split(delim))

    @staticmethod
    def get_lexer(mode):
        lexer = globals().get('Lexer' + Parser.to_uppercamelcase(mode))

        # make sure it's really a subclass of Lexer
        try:
            assert issubclass(lexer, Lexer)
        except (TypeError, AssertionError):
            return None

        return lexer

    @staticmethod
    def get_renderer(mode):
        renderer = globals().get('Renderer' + Parser.to_uppercamelcase(mode))

        # make sure it's really a subclass of Renderer
        try:
            assert issubclass(renderer, Renderer)
        except (TypeError, AssertionError):
            return None

        return renderer


Call = namedtuple('LexerCall', ('command', 'data'))


class Lexer():
    """語詞分析器

    分析傳入文本之中的語法，轉為一連串渲染指令

    Attributes:
        calls (list): 渲染指令
        info (dict): 解析文本所得資訊
    """

    def __init__(self, char_subst_table=None):
        self.char_subst_table = char_subst_table
        self.calls = []
        self.info = {}

    def parse(self, fh):
        """Extend this in the subclass"""
        raise NotImplementedError

    def _add_call(self, instruction, **kwargs):
        self.calls.append(Call(instruction, kwargs))

    def _finalize(self):
        self.calls.insert(0, Call('doc_start', self.info))
        self.calls.append(Call('doc_end', self.info))


class LexerHtml(Lexer):
    TRANSPARENT_ELEMENTS = {
        'html',
        'body',
    }

    def parse(self, fh):
        def handle_elem_start(elem):
            if (elem.tag in ('h1', 'h2', 'h3', 'h4', 'h5', 'h6') and 'data-sec' not in elem.attrib
                    and not (elem.getparent().tag == 'header' and elem.getparent().getparent().tag == 'body')):
                self._add_call('html_start_tag', tag=elem.tag, attrs={**elem.attrib, 'data-sec': elem.tag})
                return

            if self.info.get('data-type') == 'book':
                if elem.tag == 'p':
                    self._add_call('html_start_tag', tag='div', attrs={**elem.attrib, 'data-sec': 'p'})
                    return

                if elem.tag == 'ins':
                    self._add_call('html_start_tag', tag='span', attrs={**elem.attrib, 'data-rev': '今版'})
                    return

                if elem.tag == 'del':
                    self._add_call('html_start_tag', tag='span', attrs={**elem.attrib, 'data-rev': '古版'})
                    return

                if elem.tag == 'aside':
                    if '眉批' in self.classlist(elem):
                        self._add_call('marginalnote_start', attrs={**elem.attrib})
                        return

                if elem.tag == 'small':
                    if '組排小字' in self.classlist(elem):
                        self._add_call('parallel_note_start', attrs={**elem.attrib})
                        return

                    if '雙行夾注' in self.classlist(elem):
                        self._add_call('cutting_note_start', attrs={**elem.attrib})
                        return

                    if '腳注' in self.classlist(elem):
                        self._add_call('footnote_start', attrs={**elem.attrib})
                        return

                    if '旁注' in self.classlist(elem):
                        self._add_call('rubynote_start', attrs={**elem.attrib})
                        return

                if elem.tag == 'b':
                    if '陰文' in self.classlist(elem):
                        self._add_call('whiteonblack_start', attrs={**elem.attrib})
                        return

                    if '圓角陰文' in self.classlist(elem):
                        self._add_call('round_whiteonblack_start', attrs={**elem.attrib})
                        return

                    if '方外框' in self.classlist(elem):
                        self._add_call('bordered_start', attrs={**elem.attrib})
                        return

                    if '圓外框' in self.classlist(elem):
                        self._add_call('round_bordered_start', attrs={**elem.attrib})
                        return

                    if '圓圈' in self.classlist(elem):
                        self._add_call('circled_start', attrs={**elem.attrib})
                        return

                    if '圓括號' in self.classlist(elem):
                        self._add_call('parenthesized_start', attrs={**elem.attrib})
                        return

                if elem.tag == 'span':
                    if '右文' in self.classlist(elem):
                        parent = elem.getparent()
                        if parent.tag == 'small' and '組排小字' in self.classlist(parent):
                            self._add_call('parallel_note_right_start', attrs={**elem.attrib})
                            return

                    if '左文' in self.classlist(elem):
                        parent = elem.getparent()
                        if parent.tag == 'small' and '組排小字' in self.classlist(parent):
                            self._add_call('parallel_note_left_start', attrs={**elem.attrib})
                            return

            if elem.tag in HTML_VOID_ELEMENTS:
                self._add_call('html_startend_tag', tag=elem.tag, attrs=dict(elem.attrib))
                return

            self._add_call('html_start_tag', tag=elem.tag, attrs=dict(elem.attrib))

        def handle_elem_end(elem):
            if elem.tag in HTML_VOID_ELEMENTS:
                return

            if (elem.tag in ('h1', 'h2', 'h3', 'h4', 'h5', 'h6') and 'data-sec' not in elem.attrib
                    and not (elem.getparent().tag == 'header' and elem.getparent().getparent().tag == 'body')):
                self._add_call('html_end_tag', tag=elem.tag, attrs={**elem.attrib, 'data-sec': elem.tag})
                return

            if self.info.get('data-type') == 'book':
                if elem.tag == 'p':
                    self._add_call('html_end_tag', tag='div', attrs={**elem.attrib, 'data-sec': 'p'})
                    return

                if elem.tag == 'ins':
                    self._add_call('html_end_tag', tag='span', attrs={**elem.attrib, 'data-rev': '今版'})
                    return

                if elem.tag == 'del':
                    self._add_call('html_end_tag', tag='span', attrs={**elem.attrib, 'data-rev': '古版'})
                    return

                if elem.tag == 'aside':
                    if '眉批' in self.classlist(elem):
                        self._add_call('marginalnote_end', attrs={**elem.attrib})
                        return

                if elem.tag == 'small':
                    if '組排小字' in self.classlist(elem):
                        self._add_call('parallel_note_end', attrs={**elem.attrib})
                        return

                    if '雙行夾注' in self.classlist(elem):
                        self._add_call('cutting_note_end', attrs={**elem.attrib})
                        return

                    if '腳注' in self.classlist(elem):
                        self._add_call('footnote_end', attrs={**elem.attrib})
                        return

                    if '旁注' in self.classlist(elem):
                        self._add_call('rubynote_end', attrs={**elem.attrib})
                        return

                if elem.tag == 'b':
                    if '陰文' in self.classlist(elem):
                        self._add_call('whiteonblack_end', attrs={**elem.attrib})
                        return

                    if '圓角陰文' in self.classlist(elem):
                        self._add_call('round_whiteonblack_end', attrs={**elem.attrib})
                        return

                    if '方外框' in self.classlist(elem):
                        self._add_call('bordered_end', attrs={**elem.attrib})
                        return

                    if '圓外框' in self.classlist(elem):
                        self._add_call('round_bordered_end', attrs={**elem.attrib})
                        return

                    if '圓圈' in self.classlist(elem):
                        self._add_call('circled_end', attrs={**elem.attrib})
                        return

                    if '圓括號' in self.classlist(elem):
                        self._add_call('parenthesized_end', attrs={**elem.attrib})
                        return

                if elem.tag == 'span':
                    if '右文' in self.classlist(elem):
                        parent = elem.getparent()
                        if parent.tag == 'small' and '組排小字' in self.classlist(parent):
                            self._add_call('parallel_note_right_end', attrs={**elem.attrib})
                            return

                    if '左文' in self.classlist(elem):
                        parent = elem.getparent()
                        if parent.tag == 'small' and '組排小字' in self.classlist(parent):
                            self._add_call('parallel_note_left_end', attrs={**elem.attrib})
                            return

            self._add_call('html_end_tag', tag=elem.tag, attrs=dict(elem.attrib))

        # 找第一個 header 取得 info
        parser = etree.iterparse(fh, html=True, encoding='UTF-8',
                                 events=('end',), tag=['header'],
                                 remove_comments=True)
        for _event, elem in parser:
            if elem.getparent().tag == 'body':
                data_type = elem.attrib.get('data-type')
                if data_type:
                    self.info['data-type'] = data_type

                for child in elem.xpath('h1'):
                    if 'title' not in self.info:
                        self.info['title'] = etree.tostring(child, method='text', encoding='unicode', with_tail=False)
                    break

                if self.info.get('data-type') == 'book':
                    for child in elem.xpath('dl[contains(concat(" ", normalize-space(@class), " "), " 元資料 ")]'):
                        meta = self.load_dl_key_value(child)

                        if 'title' not in self.info:
                            for key in ('標題', '書名', '篇名', '名稱'):
                                try:
                                    self.info['title'] = meta[key][0]
                                except (KeyError, TypeError):
                                    pass
                                else:
                                    break

                        for key in ('版式',):
                            try:
                                self.info[key] = meta[key][0]
                            except (KeyError, TypeError):
                                pass

                        for key in ('字元替換',):
                            if key in meta:
                                table = {}
                                for s in meta[key]:
                                    table.update(json.loads(s))
                                self.info[key] = table

                break

        # 初始化字元替換
        if (self.info.get('data-type') == 'book'
                and self.info.get('版式') not in ('今版', '古版')):
            char_subst_skip_elem = None
            if '字元替換' in self.info:
                char_subst_table = CharSubstTable({**(self.char_subst_table or {}), **(self.info['字元替換'] or {})})
            else:
                char_subst_table = self.char_subst_table
        else:
            char_subst_skip_elem = True

        # 處理 HTML tree
        fh.seek(0)
        parser = etree.iterparse(fh, html=True, encoding='UTF-8',
                                 events=('start', 'end'),
                                 remove_comments=True)
        for event, elem in parser:
            if event == 'start':
                # 加上前一元素的 .tail 或父元素的 .text，並將值設為 None 以免重複
                prev = elem.getprevious()
                if prev is not None:
                    attr = 'tail'
                else:
                    prev = elem.getparent()
                    attr = 'text'

                if prev is not None and prev.tag not in self.TRANSPARENT_ELEMENTS:
                    text = getattr(prev, attr)
                    if text is not None:
                        self.add_text(text, char_subst_table if char_subst_skip_elem is None else None)
                        setattr(prev, attr, None)

                if char_subst_skip_elem is None:
                    if ((elem.tag in ('header', 'footer') and elem.getparent().tag == 'body')
                            or elem.tag in ('ins', 'del')
                            or elem.attrib.get('data-rev') in ('古版', '今版')):
                        char_subst_skip_elem = elem

                if elem.tag not in self.TRANSPARENT_ELEMENTS:
                    handle_elem_start(elem)

            elif event == 'end':
                # 加上最後子元素的 .tail 或自己的 .text，並將值設為 None 以免重複
                try:
                    prev = elem[-1]
                    attr = 'tail'
                except IndexError:
                    prev = elem
                    attr = 'text'

                if prev is not None and prev.tag not in self.TRANSPARENT_ELEMENTS:
                    text = getattr(prev, attr)
                    if text is not None:
                        self.add_text(text, char_subst_table if char_subst_skip_elem is None else None)
                        setattr(prev, attr, None)

                if char_subst_skip_elem is elem:
                    char_subst_skip_elem = None

                if elem.tag not in self.TRANSPARENT_ELEMENTS:
                    handle_elem_end(elem)

                # 重設不需要的元素，以釋放記憶體
                elem.clear(keep_tail=True)
                while elem.getprevious() is not None:
                    del elem.getparent()[0]

        self._finalize()

    def add_text(self, text, char_subst_table=None):
        if char_subst_table is None:
            self._add_call('cdata', text=text)
            return

        for s, type_ in char_subst_table.replace(text):
            if type_ == 'text':
                self._add_call('cdata', text=s)

            elif type_ == 'modern':
                self._add_call('html_start_tag', tag='span', attrs={'data-rev': '今版'})
                self._add_call('cdata', text=s)
                self._add_call('html_end_tag', tag='span', attrs={'data-rev': '今版'})

            elif type_ == 'ancient':
                self._add_call('html_start_tag', tag='span', attrs={'data-rev': '古版'})
                self._add_call('cdata', text=s)
                self._add_call('html_end_tag', tag='span', attrs={'data-rev': '古版'})

    @staticmethod
    def parse_dl(elem):
        """按 WHATWG 規範解析 DL 元素的鍵值組

        ref: https://html.spec.whatwg.org/multipage/grouping-content.html#the-dl-element
        """
        def process_dt_dd(elem):
            nonlocal current
            nonlocal seen_dd
            if elem.tag.lower() == 'dt':
                if seen_dd:
                    groups.append(current)
                    current = {'name': [], 'value': []}
                    seen_dd = False
                current['name'].append(elem)
            elif elem.tag.lower() == 'dd':
                current['value'].append(elem)
                seen_dd = True

        groups = []
        current = {'name': [], 'value': []}
        seen_dd = False
        child = next(iter(elem), None)
        grandchild = None

        while child is not None:
            if child.tag.lower() == 'div':
                grandchild = next(iter(child), None)
                while grandchild is not None:
                    process_dt_dd(grandchild)
                    grandchild = grandchild.getnext()
            else:
                process_dt_dd(child)
            child = child.getnext()

        if current['name'] or current['value']:
            groups.append(current)

        return groups

    @staticmethod
    def load_dl_key_value(elem, read_raw=False):
        """將 dl 元素的鍵值組解析為 dict 物件

        Returns:
            dict: {key1: [value1, value2, ...], key2: [value1, value2, ...], ...}
        """
        def get_elem_values(stack, elem):
            data_elems = elem.xpath('data[@value]')
            if len(data_elems) and not read_raw:
                for data_elem in data_elems:
                    if 'value' in data_elem.attrib:
                        stack.append(data_elem.attrib['value'])
                    else:
                        stack.append(etree.tostring(data_elem, method='text', encoding='unicode', with_tail=False))
            else:
                stack.append(etree.tostring(elem, method='text', encoding='unicode', with_tail=False))
            return stack

        groups = LexerHtml.parse_dl(elem)
        metadata = {}

        for group in groups:
            names = reduce(get_elem_values, group['name'], [])
            values = reduce(get_elem_values, group['value'], [])

            for name in names:
                metadata.setdefault(name, [])
                for value in values:
                    metadata[name].append(value)

        return metadata

    @staticmethod
    def classlist(elem):
        try:
            return REGEX_ASCII_WHITESPACES.split(elem.attrib['class'])
        except KeyError:
            return []


class Renderer():
    """渲染器

    將傳入的一連串渲染指令輸出為指定格式的文本
    """

    def __init__(self, filter=None):
        self.filter = filter or []
        self.output = None

    def render(self, calls=None):
        self._output = []

        for instruction, data in (calls or []):
            getattr(self, f'render_{instruction}')(data)

        self._finalize()
        return self.output

    def render_doc_start(self, info=None):
        pass

    def render_doc_end(self, info=None):
        pass

    def render_cdata(self, data):
        self._add_output(data.get('text') or '')

    def _add_output(self, content):
        self._output.append(content)

    def _finalize(self):
        self.output = ''.join(self._output)


class RendererHtml(Renderer):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self._hidden_output_stack = []

    def render_cdata(self, data):
        self._add_output(html_escape(data.get('text') or '', dquote=False))

    def render_htmldata(self, data):
        self._add_output(data.get('text') or '')

    def render_html_tag(self, data):
        """Shorthand of html_start_tag + cdata + html_end_tag."""
        self.render_html_start_tag(data)
        self.render_cdata(data)
        self.render_html_end_tag(data)

    def render_html_start_tag(self, data):
        data = self._hook_filter_tag(data, 'start')
        if data is None:
            return
        self._add_output(f'<{data["tag"]}{self._render_html_attributes(data.get("attrs"))}>')

    def render_html_end_tag(self, data):
        data = self._hook_filter_tag(data, 'end')
        if data is None:
            return
        self._add_output(f'</{data["tag"]}>')

    def render_html_startend_tag(self, data):
        data = self._hook_filter_tag(data, 'startend')
        if data is None:
            return
        self._add_output(f'<{data["tag"]}{self._render_html_attributes(data.get("attrs"))}>')

    def render_parallel_note_start(self, data):
        self.render_html_start_tag({'tag': 'small', **data})
        self.render_html_tag({'tag': 'jc-s', 'text': '（'})

    def render_parallel_note_end(self, data):
        self.render_html_tag({'tag': 'jc-s', 'text': '）'})
        self.render_html_end_tag({'tag': 'small', **data})

    def render_parallel_note_right_start(self, data):
        self.render_html_start_tag({'tag': 'span', **data})

    def render_parallel_note_right_end(self, data):
        self.render_html_end_tag({'tag': 'span', **data})

    def render_parallel_note_left_start(self, data):
        self.render_html_tag({'tag': 'jc-s', 'text': '／'})
        self.render_html_start_tag({'tag': 'span', **data})

    def render_parallel_note_left_end(self, data):
        self.render_html_end_tag({'tag': 'span', **data})

    def render_cutting_note_start(self, data):
        self.render_html_start_tag({'tag': 'small', **data})
        self.render_html_tag({'tag': 'jc-s', 'attrs': {'data-rev': '古版-元素'}, 'text': '（'})

    def render_cutting_note_end(self, data):
        self.render_html_tag({'tag': 'jc-s', 'attrs': {'data-rev': '古版-元素'}, 'text': '）'})
        self.render_html_end_tag({'tag': 'small', **data})

    def render_footnote_start(self, data):
        self.render_html_start_tag({'tag': 'small', **data})
        self.render_html_tag({'tag': 'jc-s', 'text': '〔'})

    def render_footnote_end(self, data):
        self.render_html_tag({'tag': 'jc-s', 'text': '〕'})
        self.render_html_end_tag({'tag': 'small', **data})

    def render_rubynote_start(self, data):
        self.render_html_start_tag({'tag': 'small', **data})
        self.render_html_tag({'tag': 'jc-s', 'text': '〘'})

    def render_rubynote_end(self, data):
        self.render_html_tag({'tag': 'jc-s', 'text': '〙'})
        self.render_html_end_tag({'tag': 'small', **data})

    def render_marginalnote_start(self, data):
        self.render_html_start_tag({'tag': 'aside', **data})
        self.render_html_tag({'tag': 'jc-s', 'text': '〚'})

    def render_marginalnote_end(self, data):
        self.render_html_tag({'tag': 'jc-s', 'text': '〛'})
        self.render_html_end_tag({'tag': 'aside', **data})

    def render_whiteonblack_start(self, data):
        self.render_html_start_tag({'tag': 'b', **data})
        self.render_html_tag({'tag': 'jc-s', 'text': '【'})

    def render_whiteonblack_end(self, data):
        self.render_html_tag({'tag': 'jc-s', 'text': '】'})
        self.render_html_end_tag({'tag': 'b', **data})

    def render_round_whiteonblack_start(self, data):
        self.render_html_start_tag({'tag': 'b', **data})
        self.render_html_tag({'tag': 'jc-s', 'text': '【'})

    def render_round_whiteonblack_end(self, data):
        self.render_html_tag({'tag': 'jc-s', 'text': '】'})
        self.render_html_end_tag({'tag': 'b', **data})

    def render_bordered_start(self, data):
        self.render_html_start_tag({'tag': 'b', **data})
        self.render_html_tag({'tag': 'jc-s', 'text': '〖'})

    def render_bordered_end(self, data):
        self.render_html_tag({'tag': 'jc-s', 'text': '〗'})
        self.render_html_end_tag({'tag': 'b', **data})

    def render_round_bordered_start(self, data):
        self.render_html_start_tag({'tag': 'b', **data})
        self.render_html_tag({'tag': 'jc-s', 'text': '〖'})

    def render_round_bordered_end(self, data):
        self.render_html_tag({'tag': 'jc-s', 'text': '〗'})
        self.render_html_end_tag({'tag': 'b', **data})

    def render_circled_start(self, data):
        self.render_html_start_tag({'tag': 'b', **data})
        self.render_html_tag({'tag': 'jc-s', 'text': '〖'})

    def render_circled_end(self, data):
        self.render_html_tag({'tag': 'jc-s', 'text': '〗'})
        self.render_html_end_tag({'tag': 'b', **data})

    def render_parenthesized_start(self, data):
        self.render_html_start_tag({'tag': 'b', **data})
        self.render_html_tag({'tag': 'jc-s', 'text': '〖'})

    def render_parenthesized_end(self, data):
        self.render_html_tag({'tag': 'jc-s', 'text': '〗'})
        self.render_html_end_tag({'tag': 'b', **data})

    def _add_output(self, content):
        """Extended to support hidden output stack."""
        try:
            output = self._hidden_output_stack[-1]
        except IndexError:
            output = self._output
        output.append(content)

    def _render_html_attributes(self, attrs):
        if not attrs:
            return ''
        return ''.join(f' {k}' if k in HTML_BOOLEAN_ATTRIBUTES else f' {k}="{html_escape(v)}"' for k, v in attrs.items())

    def _hook_filter_tag(self, data, type):
        action = self._hook_filter_tag_filter(data)

        # 將標籤內容移至 data-jc-innerhtml，
        # 若已屬隱藏內容則原樣輸出
        if action == 'hide':
            if type == 'start':
                self._hidden_output_stack.append([])
                return None
            elif type == 'end':
                hidden_output = ''.join(self._hidden_output_stack.pop())
                if len(self._hidden_output_stack) == 0:
                    if hidden_output:
                        data.setdefault('attrs', {})['data-jc-innerhtml'] = hidden_output
                    self._add_output(f'<{data["tag"]}{self._render_html_attributes(data.get("attrs"))}></{data["tag"]}>')
                    return None
                else:
                    self._add_output(f'<{data["tag"]}{self._render_html_attributes(data.get("attrs"))}>{hidden_output}</{data["tag"]}>')
                    return None
            elif type == 'startend':
                pass

        # 將標籤及其內容移除
        elif action == 'remove':
            if type == 'start':
                self._hidden_output_stack.append([])
                return None
            elif type == 'end':
                self._hidden_output_stack.pop()
                return None
            elif type == 'startend':
                return None

        # 將標籤轉換為佔位標籤 <jc-t tag="..." attr-k="v">...</jc-t>
        elif action == 'hide_tag':
            attrs = {'tag': data['tag']}
            for k, v in data.get('attrs', {}).items():
                attrs[f'attr-{k}'] = v
            data = {'tag': 'jc-t', 'attrs': attrs, 'text': data.get('text')}

            if type == 'startend':
                self._add_output(f'<{data["tag"]}{self._render_html_attributes(data.get("attrs"))}></{data["tag"]}>')
                return None

        # 將標籤移除，內容則保留
        elif action == 'remove_tag':
            return None

        return data

    def _hook_filter_tag_filter(self, data):
        for rule in self.filter:
            for r in rule.get('include', []):
                tag = r.get('tag')
                if not tag or tag == data['tag']:
                    if all(data.get('attrs', {}).get(k) == v for k, v in r.get('attrs', {}).items()):
                        break
            else:
                continue

            for r in rule.get('exclude', []):
                tag = r.get('tag')
                if not tag or tag == data['tag']:
                    if all(data.get('attrs', {}).get(k) == v for k, v in r.get('attrs', {}).items()):
                        break
            else:
                return rule.get('action')

        return None


class CharSubstTable(dict):
    """字元轉換表"""

    def __init__(self, *args, **kwargs):
        """初始化轉換表，修正輸入的 dict"""
        super().__init__(*args, **kwargs)

        for k, v in list(self.items()):
            if not isinstance(k, str) or not isinstance(v, dict):
                del self[k]
                continue

            for kk, vv in list(v.items()):
                if not isinstance(kk, str) or not isinstance(vv, str):
                    del v[kk]

    @cached_property
    def trie(self):
        """Generate a prefix tree of the dictionary."""
        trie_obj = {}
        for key, value in self.items():
            current = trie_obj
            for composite in Unicode.split(key):
                current = current.setdefault(composite, {})
            current[''] = value
        return trie_obj

    def replace(self, text):
        """對 text 做字元替換，並將同類內容緩存輸出

        Args:
            text: a string to be converted.

        Yields:
             tuple: (text, text_type)
        """
        def add_data(data, data_type):
            nonlocal stack_type
            if data_type != stack_type:
                if stack:
                    yield ''.join(stack), stack_type
                stack_type = data_type
                stack.clear()
            stack.append(data)

        stack_type = None
        stack = []
        for m in self.apply(text):
            if isinstance(m, str):
                yield from add_data(m, 'text')
            else:
                try:
                    yield from add_data(m['古'], 'ancient')
                except KeyError:
                    pass
                try:
                    yield from add_data(m['今'], 'modern')
                except KeyError:
                    pass

        if stack:
            yield ''.join(stack), stack_type

    def apply(self, parts):
        """Apply conversion for the given text parts

        Args:
            parts: a string or iterable parts to be converted.

        Yields:
            A string or a dict of a conversion.
        """
        parts = (Unicode.split(parts) if isinstance(parts, str) else
                 parts if isinstance(parts, list) else
                 list(parts))
        i = 0
        total = len(parts)
        while i < total:
            match = self.match(parts, i)
            if match is not None:
                m_dict, m_pos, m_end = match
                yield m_dict
                i = m_end
            else:
                yield parts[i]
                i += 1

    def match(self, parts, pos):
        """Match a unicode composite at pos.

        Args:
            parts: iterable parts to be matched.

        Returns:
            a tuple of (match_dict, match_pos, match_end) or None if no match.
        """
        trie = self.trie
        i = pos
        total = len(parts)
        match = None
        match_end = None
        while i < total:
            key = parts[i]
            if key not in trie:
                break
            trie = trie[key]
            if '' in trie:
                match = trie['']
                match_end = i + 1
            i = i + 1
        if match is not None:
            return match, pos, match_end
        return None


class Unicode():
    """Utilities for Unicode string handling."""

    @staticmethod
    def composite_length(text, pos):
        """Get the length of the Unicode composite at pos.

        A unicode composite is a complex of characters with composers.
        For example, an ideographic description sequence (IDS),
        or a hanzi with a variant selector (VS), etc.
        """
        i = pos
        total = len(text)
        length = 1

        while length and i < total:
            code = ord(text[i])

            # check if the current char is a prefix composer
            if code == 0x303E:
                # ideographic variation indicator
                length += 1
            elif 0x2FF0 <= code <= 0x2FF1 or 0x2FF4 <= code <= 0x2FFB:
                # IDS binary operator
                length += 2
            elif 0x2FF2 <= code <= 0x2FF3:
                # IDS trinary operator
                length += 3

            # check if the next char is a postfix composer
            if i + 1 < total:
                code = ord(text[i + 1])
                if 0xFE00 <= code <= 0xFE0F:
                    # variation selectors
                    length += 1
                elif 0xE0100 <= code <= 0xE01E:
                    # variation selectors supplement
                    length += 1
                elif 0x180B <= code <= 0x180D:
                    # Mongolian free variation selectors
                    length += 1
                elif 0x0300 <= code <= 0x036F:
                    # combining diacritical marks
                    length += 1
                elif 0x1AB0 <= code <= 0x1AFF:
                    # combining diacritical marks extended
                    length += 1
                elif 0x1DC0 <= code <= 0x1DFF:
                    # combining diacritical marks supplement
                    length += 1
                elif 0x20D0 <= code <= 0x20FF:
                    # combining diacritical marks for symbols
                    length += 1
                elif 0xFE20 <= code <= 0xFE2F:
                    # combining half marks
                    length += 1

            i += 1
            length -= 1

        return i - pos

    @staticmethod
    def split(text):
        """Split a text into a list of Unicode composites."""
        i = 0
        total = len(text)
        result = []
        while i < total:
            length = Unicode.composite_length(text, i)
            result.append(text[i:i + length])
            i += length
        return result
